* Code for assigning stocks into clusters based on the similarity in their
* fundamentals
*
* The fundamental information I consider is the same as that underneath the 
* five-factor model: size, bm, asset growth, and profitability
*
* I do not want to include any return-based information, such as momentum
*


* Alternative characteristics data
use "~/Documents/CRSP-Compustat/FMcharacteristics.dta", clear

keep permno yyyymm exchcd size booktomarket ffprofitability assetgrowth ltrev idiosyncraticvol qmj marketbeta

keep if ~missing(size, booktomarket, ffprofitability, assetgrowth)

* keep accounting-based characteristics at their June values
* this only applies to size; everything else is already done this way
gen long dateindex = (trunc(yyyymm/100) - 1900) * 12 + mod(yyyymm, 100)

foreach predictor in "size" "booktomarket" "ffprofitability" "assetgrowth" {
	
	qui gen howoldrank = 0 if mod(yyyymm,100)==6
	sort permno dateindex
	qui replace howoldrank = howoldrank[_n-1] + (dateindex-dateindex[_n-1]) if permno==permno[_n-1] & missing(howoldrank) 
	
	sort permno dateindex
	replace `predictor' = `predictor'[_n-1] if permno==permno[_n-1] & howoldrank>0 & howoldrank<=11
	
	drop howoldrank

	}

drop dateindex

* export to csv file for doing clusters in Python
sort permno yyyymm

export delimited using "~/Dropbox/Research/Factor Momentum/Python/chardata.csv", replace delimiter(",")
	




*===============================================================================
* SMALL CLUSTERS
*===============================================================================

import delimited using "~/Dropbox/Research/Factor Momentum 2/Python/clusters_small.csv", clear delimiter(",")
sort permno yyyymm
save "~/Documents/cclusters.dta", replace

* Compute returns for clusters

use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

qui keep if yyyymm>=196306

keep if size_cat>=2

keep permno yyyymm me retnm r1_1 mgindustry
rename r1_1 Lret
rename retnm ret

sort permno yyyymm

merge 1:1 permno yyyymm using "~/Documents/cclusters.dta", nogenerate keep(1 3)

* forward fill cluster assignments
gen long dateindex = (trunc(yyyymm/100) - 1900) * 12 + mod(yyyymm, 100)

foreach cluster_def in "kmeans10" "kmeans25" "kmeans50" "kmeans100" "ward10" "ward25" "ward50" "ward100" {
	
	qui gen howoldrank = 0 if ~missing(`cluster_def')
	sort permno dateindex
	qui replace howoldrank = howoldrank[_n-1] + (dateindex-dateindex[_n-1]) if permno==permno[_n-1] & missing(howoldrank) 
	
	sort permno dateindex
	qui replace `cluster_def' = `cluster_def'[_n-1] if permno==permno[_n-1] & missing(`cluster_def') & ~missing(`cluster_def'[_n-1]) & howoldrank>0 & howoldrank<=11
	
	drop howoldrank

	}
	
keep if ~missing(kmeans10) | ~missing(kmeans25) | ~missing(kmeans50) | ~missing(kmeans100) | ~missing(ward10) | ~missing(ward25) | ~missing(ward50) | ~missing(ward100)

save "~/Documents/return_clusters.dta", replace


*===============================================================================
* BIG CLUSTERS (monthly)
*===============================================================================

* Read BIG clusters from Python/chardata
import delimited using "~/Dropbox/Research/Factor Momentum 2/Python/clusters_big.csv", clear delimiter(",")
egen nc = rownonmiss(kmeans* ward*)
keep if nc>0
drop nc
sort permno yyyymm
save "~/Documents/cclusters.dta", replace

* Compute returns for clusters

use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

qui keep if yyyymm>=196306

keep if size_cat>=2

keep permno yyyymm me retnm r1_1 
rename r1_1 Lret
rename retnm ret

merge 1:1 permno yyyymm using "~/Documents/cclusters.dta", nogenerate keep(1 3)

egen nc = rownonmiss(kmeans* ward*)
keep if nc>0
drop nc
sort permno yyyymm

save "~/Documents/return_clusters_big.dta", replace




*===============================================================================
* Construct momentum strategies for Fundamental clusters 
*===============================================================================

local big = 1
local ctr = 0

foreach p in "10" "25" "50" "100" {
	
	foreach technique in "kmeans" "ward" {
		
		if `big' == 1 & ("`technique'`p'" == "kmeans10" | "`technique'`p'" == "kmeans100") {
			continue
		}
		
		local ctr = `ctr' + 1
		
		if `big' == 0 {
			use "~/Documents/return_clusters.dta", clear
		}
		else {
			use "~/Documents/return_clusters_big.dta", clear			
		}
		
		gen ccluster = `technique'`p'
			
		drop if missing(ccluster)
		bysort yyyymm ccluster: egen nc = count(permno)
		drop if nc<10

		* roll yyyymm forward by a month (so that yyyymm is from the viewpoint of returns)
		qui gen long    yyyymm_nm = 100 * floor(yyyymm/100) + mod(yyyymm,100)+1 if mod(yyyymm,100)<12
		qui replace yyyymm_nm = 100 * (floor(yyyymm/100) + 1) + 1           if mod(yyyymm,100)==12
		qui replace yyyymm    = yyyymm_nm
		drop    yyyymm_nm				
			
		drop if missing(me,ret,Lret)

		gen double me_x_ret = me * ret
		gen double me_x_Lret = me * Lret 

		collapse (sum) me_x_ret me_x_Lret me, by(yyyymm ccluster)

		gen double ret = me_x_ret / me
		gen double Lret = me_x_Lret / me

		drop me*

		save "~/Documents/ccluster_returns.dta", replace


		* FMB regressions

		use "~/Documents/ccluster_returns.dta", clear

		qui statsby _b aR2=e(r2_a), by(yyyymm) clear: regress ret Lret

		ttest _b_Lret==0


		* Construct characteristic momentum 1/1 factors 
		use "~/Documents/ccluster_returns.dta", clear

		bysort yyyymm: egen m = median(Lret)

		drop if missing(m,Lret,ret)
		qui gen portfolio = 1 if Lret<m & ~missing(Lret,m)
		qui replace portfolio = 3 if Lret>m & ~missing(Lret,m)

		keep if portfolio==1 | portfolio==3

		collapse (mean) ret, by(yyyymm portfolio)

		reshape wide ret, i(yyyymm) j(portfolio)

		qui gen `technique'`p' = ret3 - ret1

		keep yyyymm `technique'`p'

		if `big' == 0 {
			if `ctr'>1 {
				merge 1:1 yyyymm using "~/Documents/CRSP-Compustat/characteristicmomentum.dta", nogenerate
				}
			qui save "~/Documents/CRSP-Compustat/characteristicmomentum.dta", replace
		}
		else {
			rename `technique'`p' `technique'`p'_big
			if `ctr'>1 {
				merge 1:1 yyyymm using "~/Documents/CRSP-Compustat/characteristicmomentum_big.dta", nogenerate
				}
			qui save "~/Documents/CRSP-Compustat/characteristicmomentum_big.dta", replace
		}
		

	}
}
	
	
*===============================================================================
* Size, book-to-market, and size x book-to-market momemtums of Lewellen (2002)
*===============================================================================
	
local ctr = 0

foreach type in "sz_" "bm_" "sz_bm_" {
	
	local ctr = `ctr' + 1
		
	use "~/Dropbox/Research/Anomalies/Fama-French factors/ffportfolios.dta", clear

	keep yyyymm `type'? `type'??
	
	reshape long `type', i(yyyymm) j(id)
	rename `type' ret
	
	qui gen monthindex = (trunc(yyyymm/100) - 1900) * 12 + mod(yyyymm,100)
	
	qui xtset id monthindex
	gen Lret = L.ret
	
	drop monthindex
	keep if yyyymm>=196307 
	
	bysort yyyymm: egen m = median(Lret)

	drop if missing(m,Lret,ret)
	qui gen portfolio = 1 if Lret<m & ~missing(Lret,m)
	qui replace portfolio = 3 if Lret>m & ~missing(Lret,m)

	keep if portfolio==1 | portfolio==3

	collapse (mean) ret, by(yyyymm portfolio)

	reshape wide ret, i(yyyymm) j(portfolio)

	qui gen `type'mom = ret3 - ret1

	keep yyyymm `type'mom

	if `ctr'>1 {
		merge 1:1 yyyymm using "~/Documents/CRSP-Compustat/lewellenmomentum.dta", nogenerate
		}
	qui save "~/Documents/CRSP-Compustat/lewellenmomentum.dta", replace

	}
	
